##########################################
#######Getting the data set up for IRR package####
##########################################
library(reshape)
library(irr)

procfile <- read.csv(file="procfile.csv") #procfile is the raw datafile based on the coding process. There are two sets of codes for each of the free response texts.

frdatairr <- procfile[,c(3:22)]
fr <- frdatairr[order(frdatairr$responseID),]

#########################################
###IRR for Category Coding###############
#########################################

frdata_category2 <- frdatairr[,c(1,2,12)]
frdata_category2[which(is.na(frdata_category2[,3])),3] <- 0

#Putting the file in long format, if you want to check the particular responseIDs on which coders did not agree
frdata_category_v <- melt(frdata_category2, id=c("responseID", "coderID"))

##Putting the file in wide format, to check for IRR
frdata_category <- reshape(frdata_category2, idvar="responseID", timevar="coderID", direction="wide")
kripp.alpha(t(frdata_category[,-1]))


#########################################
###IRR for SubCategory Coding###############
#########################################

frdata_category2 <- frdatairr[,c(1,2,18)]
frdata_category2[which(is.na(frdata_category2[,3])),3] <- 0

#Putting the file in long format, if you want to check the particular responseIDs on which coders did not agree
frdata_category_v <- melt(frdata_category2, id=c("responseID", "coderID"))

##Putting the file in wide format, to check for IRR
frdata_category <- reshape(frdata_category2, idvar="responseID", timevar="coderID", direction="wide")
kripp.alpha(t(frdata_category[,-1]))


#########################################
###IRR for Topic Coding###############
#########################################

frdata_category2 <- frdatairr[,c(1,2,19)]
frdata_category2[which(is.na(frdata_category2[,3])),3] <- 0

#Putting the file in long format, if you want to check the particular responseIDs on which coders did not agree
frdata_category_v <- melt(frdata_category2, id=c("responseID", "coderID"))

##Putting the file in wide format, to check for IRR
frdata_category <- reshape(frdata_category2, idvar="responseID", timevar="coderID", direction="wide")
kripp.alpha(t(frdata_category[,-1]))




########################################################
#######Summary Statistics for the Free Response Data####
########################################################

descriptives <- read.csv(file="descriptives.csv") #reconcile2 is the data file that contains the coded categories from the free response data. When coders disagreed about the coding, a third coder made the final decision.  

#What proportion of the answers had to do with participation

length(which(descriptives$category==3))/length(which(!is.na(descriptives$category)))

##Proportions for the SubCategories

subcategorystats <- matrix(, 17, 6)

x <- table(descriptives$category)
y <- table(descriptives$category)/length(which(!is.na(descriptives$category)))*100
a <- table(descriptives$subcategory)

###Category 1
subcategorystats[1,1] <- c("Process")
subcategorystats[1,2] <- x[[1]]
subcategorystats[1,3] <- y[[1]]

subcategorystats[1,4]<- a[[2]]
subcategorystats[1,5]<- round((a[[2]]/x[[1]]*100), 2)
subcategorystats[2,4]<- a[[3]]
subcategorystats[2,5]<-  round((a[[3]]/x[[1]]*100), 2)
subcategorystats[3,4]<- a[[4]]
subcategorystats[3,5]<-  round((a[[4]]/x[[1]]*100), 2)
subcategorystats[4,4]<- a[[5]]
subcategorystats[4,5]<-  round((a[[5]]/x[[1]]*100), 2)
subcategorystats[5,4]<- a[[6]]
subcategorystats[5,5]<- round((a[[6]]/x[[1]]*100), 2)
subcategorystats[6,4]<- a[[7]]
subcategorystats[6,5]<-  round((a[[7]]/x[[1]]*100), 2)

##Category 2
subcategorystats[7,1] <- c("Policy")
subcategorystats[7,2] <- x[[2]]
subcategorystats[7,3] <- y[[2]]

subcategorystats[7,4]<- a[[8]]
subcategorystats[7,5]<-  round((a[[8]]/x[[2]]*100), 2)
subcategorystats[8,4]<- a[[9]]
subcategorystats[8,5]<-  round((a[[9]]/x[[2]]*100), 2)
subcategorystats[9,4]<- a[[10]]
subcategorystats[9,5]<-  round((a[[10]]/x[[2]]*100), 2)
subcategorystats[10,4]<- a[[11]]
subcategorystats[10,5]<-  round((a[[11]]/x[[2]]*100), 2)
subcategorystats[11,4]<- a[[12]]
subcategorystats[11,5]<-  round((a[[12]]/x[[2]]*100), 2)
subcategorystats[12,4]<- a[[13]]
subcategorystats[12,5]<-  round((a[[13]]/x[[2]]*100), 2)
subcategorystats[13,4]<- a[[14]]
subcategorystats[13,5]<-  round((a[[14]]/x[[2]]*100), 2)
subcategorystats[14,4]<- a[[15]]
subcategorystats[14,5]<-  round((a[[15]]/x[[2]]*100), 2)

##Category 3
subcategorystats[15,1] <- c("Participation")
subcategorystats[15,2] <- x[[3]]
subcategorystats[15,3] <- y[[3]]

subcategorystats[15,4]<- a[[16]]
subcategorystats[15,5]<-  round((a[[16]]/x[[3]]*100), 2)
subcategorystats[16,4]<- a[[17]]
subcategorystats[16,5]<-  round((a[[17]]/x[[3]]*100), 2)

##Category 4
subcategorystats[17,1] <- c("Other")
subcategorystats[17,2] <- x[[4]]
subcategorystats[17,3] <- y[[4]]


###Adding in the name of the subcategories

subcategorystats[1:16,6] <- c("American Government Structure or System", "Government Officials", "Campaigns or Candidates", "Media", "Large-Scale Political Behavior", "Other Unspecified Part of Process", "Economy", "Environment", "Foreign Policy", "Healthcare", "LGBT Rights", "Unemployment", "Women's Issues", "Other Policy", "Politics and Respondent", "Politics and Respondent's Social Network")

###Topics
a <- table(descriptives$topic)
t(t(a))

##This provides the count of responses in each topic. Divide by the total number of responses in each subcategory to get the percents (124 for "politics and respondent" and 166 for "politics and respondent's social network")

t(t(a))/124 ##Look just at the codes in the 300s
t(t(a))/166 ##Look just at the codes in the 310s
